

*This program computes instruments for the sensitivity analysis using different values of rho (Table B.5)



*bilateral trade data
use oecd_gravity_data_intermediates.dta, clear
sort year originId sectorId

save temp.dta, replace

*Our Elasticities
*Using the GYY estimates
use theta_estimates_gyy.dta, clear
rename theta_gyy_pml theta
keep sectorId theta se_gyy

*Replace non-manufacturing theta in the baseline with the median elasticity
replace theta=. if sectorId<3
egen theta_non = median(theta)
replace theta=theta_non if sectorId<3
drop theta_non

merge 1:m sectorId using temp.dta
drop _merge


*Bringing in Nunn Data
merge m:1 countryISO using nunn_aggregate.dta
drop if _merge==2
drop _merge

sort originId destId sectorId year
*drop non-traded sectors
keep if sectorId<18

*Generating share of labor force by sector-year
egen X_s = sum(X), by(year originId sectorId)
egen X_tot = sum(X), by(originId sectorId)
gen logL_share = log(X_s/X_tot)
drop X_s X_tot

*drop non-traded sectors
keep if sectorId<18
keep if sectorId>2

*Generating share of MANUFACTURING labor force by sector-year
egen X_s = sum(X), by(year originId sectorId)
egen X_tot = sum(X), by(originId sectorId)
gen logL_share_man = log(X_s/X_tot)
drop X_s X_tot


save temp.dta, replace
**************************************************************************
*Constructing Sectoral Price Indices
*************************************************************

*Taking logs
gen logX=log(X)
drop if logX==.
egen E = sum(X), by(destId sectorId year)
gen logE = log(E)

*Also need consumption expenditures for the estimation with intermediate goods
egen E_C = sum(X_C), by(destId sectorId year)
gen logE_C = log(E_C)

*******************
*Baseline Values of Theta
*******************
*Average trade shares, adjusted by the trade elasticity for each pair-sector-year
gen ts_theta = (logX-logE)/theta
*Price index for each destination-sector-year
egen logprice_base=mean(ts_theta), by(year destId sectorId)

 keep if originId==destId

keep year sectorId destId logprice* logE pop logE_C
sort year destId sectorId
duplicates drop
 
 gen logpop = log(pop)
 egen clusterId=group(destId)
 

*Building the CES instruments

*Sigmas
local sigma_base=0.87
local sigma_low =.1
local sigma_high=3
local sigma_cd=1


*Instruments (log L +log beta, where beta is the demand residual)

foreach x in base high low	cd {
gen comp1 = exp(logE)/(exp(logprice_base))^(1-`sigma_`x'') if sectorId>2
egen comp=sum(comp1),by(destId  year)
gen beta_`x' = comp1/comp
gen instrument_`x' = log(beta_`x')+logpop
*Also generating an instrument based purely on demand shocks
gen instrument_pure_`x' = log(beta_`x')
drop comp1 comp	
	}
	
************************************************************************************************************
*Now merging back into the main dataset, associating them with origins rather than destinations

keep sectorId year destId logprice_base instrument*
rename destId originId

merge 1:m originId sectorId year using temp.dta, ///
	nogen

save int_reg_data_sensitivity_rho.dta, replace


*Next, need to reshape prices to construct price indices
keep if originId==destId
keep originId year sectorId logprice_base
reshape wide logprice_base, i(originId year) j(sectorId)

merge 1:m originId year using int_reg_data_sensitivity_rho.dta, ///
	nogen

drop logprice_base
save int_reg_data_sensitivity_rho.dta, replace


***********************************************************************************************
*Generating independent and dependent variables under various assumptions
****

*Dependent variable with no intermediates
gen logX_theta=log(X)/theta

*Now construct independent variables for all cases
*Baseline: no intermediates
egen X_s = sum(X), by(year originId sectorId)
gen logX_s = log(X_s)
gen logL=log(X_s/nominal_wage_go)
label var logL "Log L_ik under the baseline assumption of labor as the only factor of production"



*Cleaning up, labeling variables
gen logX=log(X)
keep  originId destId sectorId year countryISO instrument* logX* logL* Qc ln_credit_banks real_wage_pwt theta* pop logprice_base* se_gyy

sort originId destId sectorId year
order originId destId sectorId year
 
save regression_data_sensitivity_rho.dta, replace






 
 
 


